package snippets.cse524.activeDates;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import edu.umd.cloud9.collection.wikipedia.WikipediaPage;
import edu.umd.cloud9.collection.wikipedia.WikipediaPageInputFormat;
import edu.umd.cloud9.io.array.ArrayListWritable;
import edu.umd.cloud9.io.array.ArrayListWritableComparable;
import edu.umd.cloud9.io.pair.PairOfInts;
import edu.umd.cloud9.io.pair.PairOfStringInt;
import edu.umd.cloud9.io.pair.PairOfStrings;
import edu.umd.cloud9.io.pair.PairOfWritables;

public class EntityActivityProfileThresholdedIntervals extends Configured
		implements Tool {

	public static class Map extends MapReduceBase implements
	// Mapper<LongWritable, WikipediaPage, Text,
	// PairOfWritables<ArrayListWritable<PairOfStrings>,
	// ArrayListWritable<PairOfWritables<Text,
	// ArrayListWritable<PairOfWritables<DoubleWritable,
	// PairOfWritables<IntWritable, PairOfInts>>>>>>> {
			Mapper<LongWritable, WikipediaPage, Text, Text> {
		private static final IntWritable ONE = new IntWritable(1);
		public int[] GAPS = { 1, 5, 10, 15, 20 };

		double threshold = 0.0;
		boolean weightedYears = false;

		@Override
		public void configure(JobConf job) {
			// TODO Auto-generated method stub
			super.configure(job);
			
			threshold = job.getFloat("threshold", 0.0f);
			weightedYears = job.getBoolean("weighted", false);

			System.out.println("Threshold, weighted: " + threshold + ", "
					+ weightedYears);
		}

		@Override
		public void map(LongWritable key, WikipediaPage page,
		// OutputCollector<Text,
		// PairOfWritables<ArrayListWritable<PairOfStrings>,
		// ArrayListWritable<PairOfWritables<Text,
		// ArrayListWritable<PairOfWritables<DoubleWritable,
		// PairOfWritables<IntWritable, PairOfInts>>>>>>> collector,
				OutputCollector<Text, Text> collector, Reporter arg3)
				throws IOException {
			if (!page.isArticle()) {
				return;
			}

			String content = null;

			try {
				content = page.getWikiMarkup();
			} catch (Exception e) {
				return;
			}

			WikipediaDateExtractor wde = new WikipediaDateExtractor(content, page.getTitle());
			wde.adjustYearWeights = weightedYears;
			wde.cutOffThreshold = threshold;

			if (!wde.isEntity()) {
				return;
			}

			GregorianCalendar birthDate = wde.getBirthdate(), deathDate = wde
					.getDeathdate();

			if (birthDate == null) {
				return;
			}

			int birthYear = birthDate.get(Calendar.YEAR);
			int deathYear = (deathDate == null) ? birthYear + 100 : deathDate
					.get(Calendar.YEAR);

			java.util.Map<Integer, Integer> yearCounts = new HashMap<Integer, Integer>();

			ArrayListWritable<PairOfStrings> metadata = new ArrayListWritable<PairOfStrings>();

			// Add page metadata
			metadata.add(new PairOfStrings("title", page.getTitle()));
			metadata.add(new PairOfStrings("birthYear", Integer
					.toString(birthYear)));
			metadata.add(new PairOfStrings("deathYear", Integer
					.toString(deathYear)));

			// ArrayListWritable<PairOfWritables<Text,
			// ArrayListWritable<PairOfWritables<DoubleWritable,
			// PairOfWritables<IntWritable, PairOfInts>>>>> weightedPeriods =
			// new ArrayListWritable<PairOfWritables<Text,
			// ArrayListWritable<PairOfWritables<DoubleWritable,
			// PairOfWritables<IntWritable, PairOfInts>>>>>();
			//
			// java.util.Map<String,
			// List<WikipediaDateExtractor.WeigthtedActivity>>
			// mostWeightedPeriods = wde
			// .getActivityProfile();
			//
			// for (String weightMethod : mostWeightedPeriods.keySet()) {
			//
			// ArrayListWritable<PairOfWritables<DoubleWritable,
			// PairOfWritables<IntWritable, PairOfInts>>> periods = new
			// ArrayListWritable<PairOfWritables<DoubleWritable,
			// PairOfWritables<IntWritable, PairOfInts>>>();
			// for (WikipediaDateExtractor.WeigthtedActivity weightedActivity :
			// mostWeightedPeriods
			// .get(weightMethod)) {
			//
			// // we add an object as such:
			// // ratio -> ( gap -> (beginYear, endYear) )
			//
			// periods.add(new PairOfWritables<DoubleWritable,
			// PairOfWritables<IntWritable, PairOfInts>>(
			// new DoubleWritable(weightedActivity
			// .getMentionsRatio()),
			// new PairOfWritables<IntWritable, PairOfInts>(
			// new IntWritable(weightedActivity
			// .getNumberOfYears()),
			// new PairOfInts(weightedActivity
			// .getBeginYear(), weightedActivity
			// .getEndYear()))));
			// }
			//
			// weightedPeriods
			// .add(new PairOfWritables<Text,
			// ArrayListWritable<PairOfWritables<DoubleWritable,
			// PairOfWritables<IntWritable, PairOfInts>>>>(
			// new Text(weightMethod), periods));
			// }
			//
			// PairOfWritables<ArrayListWritable<PairOfStrings>,
			// ArrayListWritable<PairOfWritables<Text,
			// ArrayListWritable<PairOfWritables<DoubleWritable,
			// PairOfWritables<IntWritable, PairOfInts>>>>>> entityInfo = new
			// PairOfWritables<ArrayListWritable<PairOfStrings>,
			// ArrayListWritable<PairOfWritables<Text,
			// ArrayListWritable<PairOfWritables<DoubleWritable,
			// PairOfWritables<IntWritable, PairOfInts>>>>>>(
			// metadata, weightedPeriods);
			//
			double mentions = wde.getLifeYearsMentions();

			StringBuffer output = new StringBuffer();
			//
			// for (String weightMethod : mostWeightedPeriods.keySet()) {
			// List<WikipediaDateExtractor.WeigthtedActivity> weightedActivities
			// = mostWeightedPeriods.get(weightMethod);
			//
			// if(weightedActivities.size() > 0) {
			// WikipediaDateExtractor.WeigthtedActivity weightedActivity =
			// weightedActivities.get(0);
			//
			// output.append(weightMethod + ": " +
			// weightedActivity.getBeginYear() + " - " +
			// weightedActivity.getEndYear() + "\t");
			// }
			//
			// }

			List<Double> thresholds = new ArrayList<Double>();

			thresholds.add(0.6);
			thresholds.add(0.7);
			thresholds.add(0.8);
			thresholds.add(0.9);
			thresholds.add(0.95);
			thresholds.add(1.0d);

			/* java.util.Map<Double, Pair<Integer, Integer>> minIntervals = wde
					.getMinimumIntervalsForThresholds(thresholds);

			for (Double threshold : thresholds) {
				if (!minIntervals.containsKey(threshold))
					continue;

				Pair<Integer, Integer> minYears = minIntervals.get(threshold);

				output.append(Double.toString(threshold) + "\t\t"
						+ minYears.getLeft() + "\t" + minYears.getRight()
						+ "\t\t");
			}

			collector.collect(new Text(mentions + "\t" + page.getTitle()),
					new Text(output.toString())); */
		}
	}

	/**
	 * @param args
	 * @throws Exception
	 */
	public static void main(String[] args) throws Exception {
		ToolRunner.run(new EntityActivityProfileThresholdedIntervals(), args);

	}

	@Override
	public int run(String[] args) throws Exception {
		String inputPath = args[0];
		String outputPath = args[1];

		JobConf conf = new JobConf(getConf(),
				EntityActivityProfileThresholdedIntervals.class);
		conf.setJobName(String.format("EntityActivityProfile [%s: %s]",
				inputPath, outputPath));

		conf.setFloat("threshold", Float.parseFloat(args[2]));
		conf.setBoolean("weighted", Boolean.parseBoolean(args[3]));

		conf.setNumMapTasks(12);
		conf.setNumReduceTasks(12);

		FileInputFormat.setInputPaths(conf, new Path(inputPath));
		FileOutputFormat.setOutputPath(conf, new Path(outputPath));

		conf.setInputFormat(WikipediaPageInputFormat.class);
		conf.setOutputFormat(TextOutputFormat.class);

		conf.setMapperClass(Map.class);

		// Set the outputs for the Map
		// PairOfWritables<ArrayListWritable<PairOfStrings>,
		// ArrayListWritable<PairOfWritables<Text,
		// ArrayListWritable<PairOfWritables<DoubleWritable,
		// PairOfWritables<IntWritable, PairOfInts>>>>>> mapOpDummy = new
		// PairOfWritables<ArrayListWritable<PairOfStrings>,
		// ArrayListWritable<PairOfWritables<Text,
		// ArrayListWritable<PairOfWritables<DoubleWritable,
		// PairOfWritables<IntWritable, PairOfInts>>>>>>();
		Text mapOpDummy = new Text();

		conf.setMapOutputKeyClass(Text.class);
		conf.setMapOutputValueClass(mapOpDummy.getClass());

		ArrayListWritableComparable<PairOfInts> opValueDummy = new ArrayListWritableComparable<PairOfInts>();

		JobClient.runJob(conf);

		return 0;
	}

}
